In [1]:
# Load the "autoreload" extension
%load_ext autoreload
# always reload modules marked with "%aimport"
%autoreload 1
import os
import sys
# add the 'src' directory as one where we can import modules
src_dir = os.path.join(os.getcwd(), os.pardir, 'src', 'data')
sys.path.append(src_dir)
In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn import metrics
from tqdm import tqdm
In [3]:
%aimport scrape_buda
In [4]:
interim_dir = os.path.join(os.getcwd(), os.pardir, 'data', 'interim')
In [9]:
figures_dir = os.path.join(os.getcwd(), os.pardir, 'reports', 'figures')
In [5]:
from scrape_buda import BudaRating
In [6]:
ratings = scrape_buda.BudaRating()
prefix = os.path.join(interim_dir, 'data20160521')
ratings.load_buda(prefix)
In [116]:
ratings.predicted_rating()
In [24]:
ratings.allteams.to_csv(os.path.join(interim_dir, 'withselfcaptain_ratings.csv'))
In [7]:
ratings.allteams = pd.read_csv(os.path.join(interim_dir, 'withselfcaptain_ratings.csv'))
In [15]:
sns.set_context('poster')
sns.set_style('white')
for year in range(2010, 2017):
index = (ratings.allteams['year'] == year) & \
(ratings.allteams['type'] == 'Hat') & \
(ratings.allteams['season'] == 'Spring') & \
(ratings.allteams['divname'] == 'JP Mixed (4/3)')
okdf = ratings.allteams[index]
okdf['experience_converted'] = scrape_buda.experience_to_self(okdf['experience_rating'])
okdf = okdf.rename(columns={'self_rating':'Self Rating',
'captain_rating':'Captain Rating',
'draft_rating':'Draft Rating',
'experience_converted': 'Experience Rating',
'plusminus': 'Plus/Minus per Game'})
g = sns.PairGrid(okdf, y_vars=["Plus/Minus per Game"],
x_vars=["Self Rating", "Captain Rating", "Draft Rating", "Experience Rating"], size=4)
g.map(sns.regplot, color=".3")
# plt.title(year)
g.set(ylim=(-10, 10), xlim=(40, 60), title=year)
plt.savefig(os.path.join(figures_dir, 'RatingComparison_{}'.format(year)))
In [24]:
sns.set_context('poster')
sns.set_style('white')
# for year in range(2010, 2016):
index = (ratings.allteams['year'] < 2017) & (ratings.allteams['year'] >= 2010) & \
(ratings.allteams['type'] == 'Hat') & \
(ratings.allteams['season'] == 'Spring') & \
(ratings.allteams['divname'] == 'JP Mixed (4/3)')
sph = ratings.allteams[index]
sph['experience_converted'] = scrape_buda.experience_to_self(sph['experience_rating'])
sph = sph.rename(columns={'self_rating':'Self Rating',
'captain_rating':'Captain Rating',
'draft_rating':'BUDA Rating',
'experience_converted': 'Club Rating',
'plusminus': 'Plus/Minus per Game'})
# zscale = scrape_buda.experience_to_self(sph['experience_rating'].values)
# f, axes = plt.subplots(1, 4)
# f.set_size_inches(18,4)
# ax = axes[0]
g = sns.jointplot(x='Self Rating', y='Plus/Minus per Game', data=sph, kind='reg', xlim=[40,60], size=6, ylim=[-15,15])
plt.savefig(os.path.join(figures_dir, 'SelfRatingComparison'))
# g = sns.jointplot(sph['captain_rating'].values, sph['plusminus'].values, kind='reg', xlim=[40,60], size=6)
g = sns.jointplot(x='Captain Rating', y='Plus/Minus per Game', data=sph, kind='reg', xlim=[40,60], size=6, ylim=[-15,15])
# ax = plt.gca()
# ax.set_xlabel('Captain Rating')
# ax.set_ylabel('Plus/Minus per Game')
# plt.tight_layout()
plt.savefig(os.path.join(figures_dir, 'CaptainRatingComparison'))
# g = sns.jointplot(sph['draft_rating'].values, sph['plusminus'].values, kind='reg', xlim=[40,60], size=6)
g = sns.jointplot(x='BUDA Rating', y='Plus/Minus per Game', data=sph, kind='reg', xlim=[40,60], size=6, ylim=[-15,15])
# ax = plt.gca()
# ax.set_xlabel('Draft Rating')
# ax.set_ylabel('Plus/Minus per Game')
# plt.tight_layout()
plt.savefig(os.path.join(figures_dir, 'BUDARatingComparison'))
# sns.jointplot(zscale, sph['plusminus'].values, kind='reg', xlim=[40,60], size=6)
g = sns.jointplot(x='Club Rating', y='Plus/Minus per Game', data=sph, kind='reg', xlim=[40,60], size=6, ylim=[-15,15])
# ax = plt.gca()
# ax.set_xlabel('Experience Rating')
# ax.set_ylabel('Plus/Minus per Game')
# plt.tight_layout()
plt.savefig(os.path.join(figures_dir, 'ClubRatingComparison'))
# ax.set_xlim([30, 60])
# plt.plot(zscale, sph['plusminus'], '.', color='salmon')
# plt.plot(sph['self_rating'], sph['plusminus'], '.', color='cyan')
# zscale = (sph['experience_rating'] - sph['experience_rating'].mean()) / sph['experience_rating'].std()
# plt.plot(zscale, sph['plusminus'], '.')
In [176]:
ratings.allteams.loc[ratings.allteams['teamid'] == 34600, 'type'].values[0]
Out[176]:
In [64]:
self_ratings = pd.read_csv(os.path.join(interim_dir, 'data20160521_selfcaptain_ratings.csv'))
In [65]:
self_ratings.columns
Out[65]:
In [113]:
self_ratings['self_rank'] = 2 * self_ratings['rank'] - self_ratings['captain_rank']
ok = (self_ratings['self_rank'] >= 0) & (self_ratings['self_rank'] <= 100)
sns.jointplot(self_ratings.loc[ok, 'self_rank'], self_ratings.loc[ok, 'captain_rank'],
kind='kde', xlim=[0,100], ylim=[0,100])
Out[113]:
In [26]:
dfallteams = pd.read_csv(os.path.join(interim_dir, 'withselfcaptain_ratings.csv'))
In [8]:
dfallteams.columns
Out[8]:
In [10]:
ok = (dfallteams['divname'] == 'JP Mixed (4/3)') & \
(dfallteams['season'] == 'Spring') & \
(dfallteams['type'] == 'Hat') & \
(dfallteams['year'] == 2011)
In [12]:
sph2011 = dfallteams[ok]
In [21]:
sph2011 = ratings.validate_rating()
In [22]:
sph2011.to_csv(os.path.join(interim_dir, 'validate_sph2011.csv'))
Validation is confirmed -- The captain's ratings, self ratings, and draft ratings I've calculated from the database match those in the spreadsheet from when I was a captain on Skynet (Spring hat league, 2011, JP Mixed).
In [31]:
selfrating = ratings.self_ratings.groupby(['rank_type', 'user_id']).mean()
In [34]:
indivrating = selfrating.loc[1, 'rank'].dropna()
In [35]:
len(indivrating)
Out[35]:
In [36]:
captainrating = selfrating.loc[2, 'rank'].dropna()
In [37]:
len(captainrating)
Out[37]:
In [38]:
3692 / 8181.
Out[38]:
In [ ]: